#Replication material for: 
#When Censorship Works: Exploring the Resilience of News Websites to Online Censorship
#British Journal of Political Science
#Author: Philipp M. Lutscher

#This script runs the analyses in the manuscript and appendix 
#Excluding the placebo tests

#Remove objects
rm(list = ls())

#Measure duration
start <- Sys.time()

###Load packages ####
library(tidyverse)
library(lubridate)
library(nlme)
library(xtable)
library(ggpubr)
library(readxl)
library(texreg)


#Load helper functions
source("helper_functions.R")

#Load main datasets
sample_traffic_final_out_egypt <- readRDS("processed/sample_traffic_final_out_egypt.rds")
sample_traffic_final_out_other <- readRDS("processed/sample_traffic_final_out_other.rds")
sample_egypt_censored <- readRDS("processed/sample_egypt_censored.rds")


### Main traffic analyses ####

#Main results
main_plot_a <- plot_traffic(bw = 30,stance_fun = unique(sample_egypt_censored$stance),
             size_fun = unique(sample_egypt_censored$size),
             outcome_fun = "reach_per_million",dataset = sample_egypt_censored) 

main_plot_b <- plot_traffic(bw = 142, stance_fun = unique(sample_egypt_censored$stance),
                            size_fun = unique(sample_egypt_censored$size),
                            outcome_fun = "reach_per_million", dataset = sample_egypt_censored) 

main_plot_c <- plot_traffic_placebo(bw = 30,stance_fun = unique(sample_egypt_censored$stance),
                     size_fun = unique(sample_egypt_censored$size),
                     outcome = "reach_per_million",dataset = sample_egypt_censored,
                     nsims = 10000)

main_plot_d <- plot_traffic_placebo(bw = 142,stance_fun = unique(sample_egypt_censored$stance),
                                    size_fun = unique(sample_egypt_censored$size),
                                    outcome = "reach_per_million",dataset = sample_egypt_censored,
                                    nsims = 10000)

main_plot <- ggarrange(main_plot_a[[1]],main_plot_b[[1]],
                      main_plot_c[[1]], main_plot_d[[1]], ncol = 2, 
                      nrow = 2,labels = c("a","b","c", "d"),
                      font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                      hjust = 0) 
ggsave("output/figures/figure_1_traffic_result.pdf",main_plot,width = 26, height = 16)


### Website closure analyses ####

aggregate_outlets_egypt <- sample_traffic_final_out_egypt %>% mutate(deleted = ifelse(current_status == "closed",1,0)) %>%
  group_by(site) %>% summarize(deleted = max(deleted),
                               audience = unique(`main audience / scope`),
                               size = unique(size),
                               blocked = max(blocked),
                               source_of_funding = unique(source_of_funding),
                               stance = unique(stance),
                               type = unique(activity_type),
                               hq = unique(`country where the office / team is based`),
                               date = unique(estimated_date_of_determining_blocking),
                               threat = unique(`other threat`),
                               blocked_type = unique(blocking))

#Recode some of the variables
aggregate_outlets_egypt <- aggregate_outlets_egypt %>% mutate(type = ifelse(type %in% c("TV","Radio"),"TV",type),
                                                              source_of_funding = ifelse(source_of_funding %in% c("State-backed","State-owned"),"State",source_of_funding),
                                                              hq = ifelse(hq == "Egypt","Egypt","other"),
                                                              date = as.Date(date),
                                                              date_mont = paste0(months(date),year(date)),
                                                              threat = ifelse(threat == is.na(threat), 0, 1),
                                                              blocked_robust = ifelse(blocked_type == "blocked", 1, 0))

aggregate_outlets_egypt$threat[is.na(aggregate_outlets_egypt$threat)] <- 0   


aggregate_outlets_egypt <- aggregate_outlets_egypt %>% mutate(date_mont = fct_relevel(aggregate_outlets_egypt$date_mont,"NANA"))

#Number of outlets that became inactive
nrow(filter(aggregate_outlets_egypt,deleted == 1)) #40

m1 <- lm(deleted~blocked,aggregate_outlets_egypt)
m2 <- lm(deleted~blocked+size+source_of_funding+stance+type+hq,aggregate_outlets_egypt)
m3 <- lm(deleted~blocked+size+source_of_funding+stance+type+hq+threat,aggregate_outlets_egypt)
m4 <- lm(deleted~blocked_robust+size+source_of_funding+stance+type+hq,aggregate_outlets_egypt)

#Model for Appendix
m_a <- lm(deleted~date_mont+size+source_of_funding+stance+type+hq,aggregate_outlets_egypt)

htmlreg(list(m1,m2,m3,m4),custom.coef.map = list('(Intercept)' = "Intercept",'blocked' = "Blocked",
                                                'blocked_robust' = "Blocked (permanent)",
                                                'sizeBelow median' = "Pre-May 2017 traffic: below median)",
                                                'source_of_fundingPrivate' = "Funding: private",
                                                'source_of_fundingState' = "Funding: state",
                                                'stanceMuslim opposition' = "Stance: Islamist opposition",
                                                'stanceOther sectarian' = "Stance: other sectarian",
                                                'stancePro-government' = "Stance: pro-government",
                                                'stanceSports/entertainment' = "Stance: sports/entertainment",
                                                'typeNews Agency' = "Type: news agency",
                                                'typePeriodical Newspaper' = "Type: periodical newspaper",
                                                'typePeriodical Newspaper (formerly)' = "Type: periodical newspaper (formerly)",
                                                'typeTV' = "Type: TV/radio",'hqother' = "HQ: Outside Egypt",
                                                'threat' = "Non-digtal threat"), file = "output/tables/table2_closure.html")

#Table for Appendix F
htmlreg(m_a, custom.coef.map = list('(Intercept)' = "Intercept", 
                                   'date_montAugust2017' = "Blocked (August 2017)",
                                   'date_montJanuary2019' = "Blocked (January 2019",
                                   'date_montJuly2017' = "Blocked (July 2017)",
                                   'date_montJune2017' = "Blocked (June 2017)",
                                   'date_montJune2018' = "Blocked (June 2018)",
                                   'date_montMarch2018' = "Blocked (March 2018)",
                                   'date_montMay2017' = "Blocked (May 2017)",
                                   'date_montMay2019' = "Blocked (May 2019)",
                                   'date_montSeptember2017' = "Blocked (September 2018)",
                                   'sizeBelow median' = "Pre-May 2017 traffic: below median)",
                                   'source_of_fundingPrivate' = "Funding: private",
                                   'source_of_fundingState' = "Funding: state",
                                   'stanceMuslim opposition' = "Stance: Islamist opposition",
                                   'stanceOther sectarian' = "Stance: other sectarian",
                                   'stancePro-government' = "Stance: pro-government",
                                   'stanceSports/entertainment' = "Stance: sports/entertainment",
                                   'typeNews Agency' = "Type: news agency",
                                   'typePeriodical Newspaper' = "Type: periodical newspaper",
                                   'typePeriodical Newspaper (formerly)' = "Type: periodical newspaper (formerly)",
                                   'typeTV' = "Type: TV/radio",'hqother' = "HQ: Outside Egypt",
                                   'threat' = "Non-digtal threat"), file = "output/tables/table_F1_closure_time.html")


### Robustness and heterogeneity test for traffic analysis (Appendix D) ####

#### Leaving out outlets that were to be found potentially unblocked ####
unblocked_plot_a <- plot_traffic(bw = 30, stance_fun = unique(sample_egypt_censored$stance),
                            size_fun = unique(sample_egypt_censored$size),
                            outcome_fun = "reach_per_million",dataset = filter(sample_egypt_censored, blocking == "blocked")) 

unblocked_plot_b <- plot_traffic(bw = 142, stance_fun = unique(sample_egypt_censored$stance),
                            size_fun = unique(sample_egypt_censored$size),
                            outcome_fun = "reach_per_million",dataset = filter(sample_egypt_censored, blocking == "blocked")) 

unblocked_plot_c <- plot_traffic_placebo(bw = 30, stance_fun = unique(sample_egypt_censored$stance),
                                    size_fun = unique(sample_egypt_censored$size),
                                    outcome = "reach_per_million",dataset = filter(sample_egypt_censored, blocking == "blocked"),
                                    nsims = 10000)

unblocked_plot_d <- plot_traffic_placebo(bw = 142, stance_fun = unique(sample_egypt_censored$stance),
                                    size_fun = unique(sample_egypt_censored$size),
                                    outcome = "reach_per_million",dataset = filter(sample_egypt_censored, blocking == "blocked"),
                                    nsims = 10000)

unblocked_plot <- ggarrange(unblocked_plot_a[[1]],unblocked_plot_b[[1]],
                            unblocked_plot_c[[1]], unblocked_plot_d[[1]], ncol = 2, 
                       nrow = 2,labels = c("a","b","c", "d"),
                       font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                       hjust = 0) 

ggsave("output/figures/figure_D3_traffic_unblocked.pdf",unblocked_plot,width = 26, height = 16)

#### Different outcomes ####
plot_traffic_sens <- function(outcome = outcome){
  main_plot_a <- plot_traffic(bw = 30,stance_fun = unique(sample_egypt_censored$stance),
                              size_fun = unique(sample_egypt_censored$size),
                              outcome_fun = outcome,dataset = sample_egypt_censored) 
  
  main_plot_b <- plot_traffic(bw = 142,stance_fun = unique(sample_egypt_censored$stance),
                              size_fun = unique(sample_egypt_censored$size),
                              outcome_fun = outcome,dataset = sample_egypt_censored) 
  
  main_plot_c <- plot_traffic_placebo(bw = 30,stance_fun = unique(sample_egypt_censored$stance),
                                      size_fun = unique(sample_egypt_censored$size),
                                      outcome_fun = outcome,dataset = sample_egypt_censored,
                                      nsims = 10000)
  
  main_plot_d <- plot_traffic_placebo(bw = 142,stance_fun = unique(sample_egypt_censored$stance),
                                      size_fun = unique(sample_egypt_censored$size),
                                      outcome_fun = outcome,dataset = sample_egypt_censored,
                                      nsims = 10000)
  main_plot <- ggarrange(main_plot_a[[1]],main_plot_b[[1]],
                         main_plot_c[[1]], main_plot_d[[1]], ncol = 2, 
                         nrow = 2,labels = c("a","b","c", "d"),
                         font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                         hjust = 0) 
  return(list(main_plot,main_plot_a[[2]],main_plot_b[[2]],main_plot_c[[3]],
              main_plot_c[[4]],main_plot_d[[3]],main_plot_d[[4]]))
}

outcomes <- c("page_views_per_million","reach_per_million3","reach_per_million14","reach_per_million_004")

sens <- lapply(outcomes, plot_traffic_sens)

ggsave("output/figures/figure_D4_page_views.pdf",sens[[1]][[1]],width = 26, height = 16)

#Combine remaining sensitive results in table
sens_table <- rbind(
          cbind(sens[[2]][[4]],sens[[2]][[5]],sens[[2]][[6]],sens[[2]][[7]]),
          cbind(sens[[3]][[4]],sens[[3]][[5]],sens[[3]][[6]],sens[[2]][[7]]),
          cbind(sens[[4]][[4]],sens[[4]][[5]],sens[[4]][[6]],sens[[2]][[7]]))

colnames(sens_table) <- c("Average change (1 month)","p-value","Average change (5 monts)","p-value")
rownames(sens_table) <- c("Linear imputation (3 days)","Linear imputation (14 days)","Replacment of missing values with 0.04")

print(xtable(sens_table),include.rownames = TRUE,
      type = "html",file = "output/tables/table_D1_sensivity.html")

#### Heterogeneity tests (size and stance) ####

plot_traffic_hetero <- function(stance=unique(sample_egypt_censored$stance), size_fun = unique(sample_egypt_censored$size)){
  main_plot_a <- plot_traffic(bw = 30,stance_fun = stance,
                              size_fun = size_fun,
                              outcome = "reach_per_million", dataset = sample_egypt_censored) 
  
  main_plot_b <- plot_traffic(bw = 142,stance_fun = stance,
                              size_fun = size_fun,
                              outcome = "reach_per_million",dataset = sample_egypt_censored) 
  
  main_plot_c <- plot_traffic_placebo(bw = 30,stance_fun = stance,
                                      size_fun = size_fun,
                                      outcome = "reach_per_million", dataset = sample_egypt_censored,
                                      nsims = 10000)
  
  main_plot_d <- plot_traffic_placebo(bw = 142,stance_fun = stance,
                                      size_fun = size_fun,
                                      outcome = "reach_per_million", dataset = sample_egypt_censored,
                                      nsims = 10000)
  main_plot <- ggarrange(main_plot_a[[1]],main_plot_b[[1]],
                         main_plot_c[[1]], main_plot_d[[1]], ncol = 2, 
                         nrow = 2,labels = c("a","b","c", "d"),
                         font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                         hjust = 0.025) 
  return(list(main_plot,main_plot_a[[2]],main_plot_b[[2]]))
}

stances <- lapply(unique(sample_egypt_censored$stance), plot_traffic_hetero,size_fun = unique(sample_egypt_censored$size))
ggsave("output/figures/figure_D5_islamist.pdf",stances[[1]][[1]],width = 26, height = 16)
ggsave("output/figures/figure_D6_independet.pdf",stances[[2]][[1]],width = 26, height = 16)
ggsave("output/figures/figure_D7_progov.pdf",stances[[3]][[1]],width = 26, height = 16)
ggsave("output/figures/figure_D8_sportsenter.pdf",stances[[4]][[1]],width = 26, height = 16)

sizes <- lapply(unique(sample_egypt_censored$size), plot_traffic_hetero,stance = unique(sample_egypt_censored$stance))
ggsave("output/figures/figure_D10_below.pdf",sizes[[1]][[1]],width = 26, height = 16)
ggsave("output/figures/figure_D9_above.pdf",sizes[[2]][[1]],width = 26, height = 16)

#Donut regression
plot_traffic_donot <- function(donut){
  main_plot_a <- plot_traffic(bw = 30, stance_fun = unique(sample_egypt_censored$stance),
                              size_fun = unique(sample_egypt_censored$size),
                              outcome_fun = "reach_per_million",dataset = sample_egypt_censored,donut = donut) 
  
  main_plot_b <- plot_traffic(bw = 142,stance_fun = unique(sample_egypt_censored$stance),
                              size_fun = unique(sample_egypt_censored$size),
                              outcome_fun = "reach_per_million",dataset = sample_egypt_censored,donut = donut) 
  
  main_plot_c <- plot_traffic_placebo(bw = 30,stance_fun = unique(sample_egypt_censored$stance),
                                      size_fun = unique(sample_egypt_censored$size),
                                      outcome_fun = "reach_per_million",dataset = sample_egypt_censored,
                                      nsims = 10000,donut = donut)
  
  main_plot_d <- plot_traffic_placebo(bw = 142,stance_fun = unique(sample_egypt_censored$stance),
                                      size_fun = unique(sample_egypt_censored$size),
                                      outcome_fun = "reach_per_million",dataset = sample_egypt_censored,
                                      nsims = 10000,donut = donut)
  
  main_plot <- ggarrange(main_plot_a[[1]],main_plot_b[[1]],
                         main_plot_c[[1]], main_plot_d[[1]], ncol = 2, 
                         nrow = 2,labels = c("(a)","(b)","(c)", "(d)"),
                         font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                         hjust = 0.025) 
  return(list(main_plot,main_plot_a[[2]],main_plot_b[[2]]))
}


plot_donut <- plot_traffic_donot(donut = TRUE)
ggsave("output/figures/figure_D2_donut.pdf",plot_donut[[1]],width = 26, height = 16)

#### ITS models ####

run_its_models <- function(bw,dataset){
  lme_out <- lme(reach_per_million~before_after_time*blocked,random = ~1+blocked|site,correlation = corAR1(form = ~ 1|site),
                 filter(dataset,before_after_time >= -bw & before_after_time <= bw & before_after_time != 0))
}

main_its_models <- lapply(c(30,142,536),run_its_models,dataset = sample_egypt_censored)


htmlreg(list(main_its_models[[1]],main_its_models[[2]],main_its_models[[3]]),
       custom.coef.map = list('(Intercept)' = "Baseline",'blocked' = "Blocked (level change)",
                              'before_after_time' = "Pre-censorship trend",
                              'before_after_time:blocked' = "Blocked (slope change)"),
       file = "output/tables/table_D2_its.html")

### Substitution effects (Appendix E) ####


spillover_agg <- filter(sample_traffic_final_out_egypt,date <= "2017-06-23") %>% group_by(site) %>%
  summarize(blocked = max(blocked)) %>% filter(blocked == 1)

sample_traffic_censored_substitute_indi <- filter(sample_traffic_final_out_egypt,!(site %in% spillover_agg$site) & 
                                                    date <= "2017-06-23" & date >= "2017-04-24") %>% 
  mutate(before_after_time = date - as.Date("2017-05-24"))

sample_traffic_censored_substitute <- sample_traffic_censored_substitute_indi  %>% 
  group_by(before_after_time,stance) %>% summarize(reach_per_million = mean(reach_per_million,na.rm = T),
                                                   page_views_per_million = mean(page_views_per_million,na.rm = T),
                                                   page_views_per_user = mean(page_views_per_user,na.rm = T)
  )

sample_traffic_censored_substitute <- sample_traffic_censored_substitute %>% mutate(blocked = ifelse(before_after_time >= 0,"Blocked","Not blocked"))

highlight_df <- sample_traffic_censored_substitute %>% 
  filter(before_after_time == 0)


substitution_plot <- ggplot(filter(sample_traffic_censored_substitute,before_after_time != 0),
                            aes(before_after_time,reach_per_million,group = blocked)) + 
  geom_point(colour = "gray40", size = 3) + 
  geom_smooth(method = "loess", se = T, colour = "black", fill = "black", span = .5, size = 2) +  
  geom_vline(xintercept = 0, linetype = 2) + 
  labs(y = "Average traffic per million", x = "Days pre-censorship and post-censorship") + 
  annotate("text", label = "Blocking date", x = 0, y = Inf, vjust = 2, hjust = 1, size = 13) +
  geom_point(data = highlight_df, 
             aes(before_after_time,reach_per_million), 
             color = 'red',
             size = 3) +
  theme_minimal() +
  theme(text = element_text(size = 30), panel.grid = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.spacing = unit(0, "in"),
        plot.margin = unit(c(5.5, 5.5, 5.5, 5.5), "points"),
        strip.text = element_text(size = 40)) + 
   facet_wrap(~stance,scales = "free") +
  theme( legend.position = "none")

ggsave(file = "output/figures/figure_E1_substitution.pdf",substitution_plot,width = 32, height = 16)



sample_traffic_censored_substitute_indi <- substitute_placebo(outcome_fun = "reach_per_million",data = filter(sample_traffic_censored_substitute,stance == "Independent"))

sample_traffic_censored_substitute_opp <- substitute_placebo(outcome_fun = "reach_per_million",data = filter(sample_traffic_censored_substitute,stance == "Islamist opposition"))

sample_traffic_censored_substitute_sec <- substitute_placebo(outcome_fun = "reach_per_million",data = filter(sample_traffic_censored_substitute,stance == "Other sectarian"))

sample_traffic_censored_substitute_pro <- substitute_placebo(outcome_fun = "reach_per_million",data = filter(sample_traffic_censored_substitute,stance == "Pro-government"))

sample_traffic_censored_substitute_sports <- substitute_placebo(outcome_fun = "reach_per_million",data = filter(sample_traffic_censored_substitute,stance == "Sports/entertainment"))


subsitution_plot_placebo <- ggarrange(sample_traffic_censored_substitute_indi[[1]] + ggtitle("Independent"),
                                      sample_traffic_censored_substitute_opp[[1]] + ggtitle("Islamist opposition"),
                      sample_traffic_censored_substitute_sec[[1]] + ggtitle("Other sectarian"),
                      sample_traffic_censored_substitute_pro[[1]] + ggtitle("Pro-government"),
                      sample_traffic_censored_substitute_sports[[1]] + ggtitle("Sports/entertainment"), ncol = 3, 
                      nrow = 2,labels = c("a","b","c", "d", "e"),
                      font.label = list(size = 34, color = "black", face = "bold", family = NULL),
                      hjust = 0) 
ggsave("output/figures/figure_E2_substitution_placebo.pdf",subsitution_plot_placebo,width = 32, height = 16)

### Newspaper Evasion analysis (Appendix G) ####

#### Changed Domain names ####
changed_names <- filter(sample_traffic_final_out_egypt,(site %in% 
                                                          c(unique(sample_traffic_final_out_egypt$site)[126:145])) &
                          blocking != "not blocked")

changed_names <- ggplot(changed_names
                        ,aes(date,page_views_per_million,col = as.factor(blocked))) + geom_smooth() + geom_point(alpha = 0.3) +
  facet_wrap(~site,scales = "free_y") + ylab("Traffic per million") + xlab("Date") +
  theme_bw(base_size = 16) + theme( legend.position = "none")


ggsave(file = "output/figures/figure_G1_changed_domains.pdf", changed_names, width = 14, height = 10)

madamasr <- ggplot(filter(sample_traffic_final_out_egypt,site == "madamasr.com")
                   ,aes(date,page_views_per_million,col = as.factor(blocked))) + geom_smooth() + geom_point(alpha = 0.3) +
  ylab("Traffic per million") + xlab("Date") + geom_vline(xintercept = as.Date("2017-05-24"),linetype = "dashed",lwd = 1,alpha = 0.3) +
  theme_bw(base_size = 16) + theme( legend.position = "none")

ggsave(file = "output/figures/figure_G2_madamasr.pdf",madamasr,width = 9, height = 7)

#### Social media analysis ####

#Results are discussed mainly "in-text" (see pp.23f)
media_egypt_list <- read_excel("input/media_egypt_list.xlsx")
social_media_cleaned <- filter(media_egypt_list,!is.na(social_media_active) & media_egypt_list$`main audience / scope` == "Egypt")
social_media_cleaned <- social_media_cleaned %>% mutate(blocked = ifelse(blocking != "not blocked","blocked","not blocked")) 
social_media_cleaned %>% group_by(blocked,social_media_active) %>% count()
#Note: Data on follower count only collected for Egyptian outlets with social media account

#Average and standard deviation for blocked
median(filter(social_media_cleaned,blocked == "blocked")$number_followers_high,na.rm = T)
sd(filter(social_media_cleaned,blocked == "blocked")$number_followers_high,na.rm = T)

#Avg and sd for not blocked
median(filter(social_media_cleaned,blocked == "not blocked")$number_followers_high,na.rm = T)
sd(filter(social_media_cleaned,blocked == "not blocked")$number_followers_high,na.rm = T)

#Investigate changes in Twitter followers
social_media_cleaned <- social_media_cleaned %>% mutate(change = (number_followers_twitter - number_followers_twitter_2017) /
                                                          number_followers_twitter_2017*100)

#Number of outlets for which changes could be explored
summary(!is.na(filter(social_media_cleaned,social_media_active == "yes")$change))

#Median change and standard deviation for blocked outlets
median(filter(social_media_cleaned,blocked == "blocked")$change,na.rm = T)
sd(filter(social_media_cleaned,blocked == "blocked")$change,na.rm = T)

#Median change and standard deviation for not blocked outlets
median(filter(social_media_cleaned,blocked == "not blocked")$change,na.rm = T)
sd(filter(social_media_cleaned,blocked == "not blocked")$change,na.rm = T)

#Exploration of changes close to the blocking date for the three outlets, 
#for which this information could be retrieved 9 (Table G.1)

domain_name <- c("madamar.com","dailynewsegypt.com","alborsanews.com")
censorship_date <- c("2017-05-24","2017-05-26","2017-05-28")
pre_followers <- c("48,500","230,000","39,200")
measurement_1 <- c("2017-03-25","2017-05-25","2017-04-22")
post_followers <- c("62,000","231,000","39,800")
measurement_2 <- c("2017-06-23","2017-06-01","2017-07-01")

closer_change_out <- as.data.frame(cbind(domain_name,censorship_date,pre_followers,measurement_1,post_followers,measurement_2))
print(xtable(closer_change_out),type = "html",
      file = "output/tables/table_G1_close_change.html",include.rownames = FALSE)


### International and regional outlets (Appendix H) ####

sample_other_censored <- filter(sample_traffic_final_out_other) %>% 
  mutate(before_after_time = date - as.Date(estimated_date_of_determining_blocking),
         blocked = ifelse(before_after_time >= 0, 1, 0),
         week = factor(lubridate::week(date)),
         weekday = factor(lubridate::wday(date)),
         month = factor(lubridate::month(date)),
         year = factor(lubridate::year(date)),
         stance = factor(stance),
         size = factor(size),
         site = factor(site)) %>% mutate(blocked = ifelse(!is.na(blocked),blocked,0))

sample_other_censored_agg <- sample_other_censored %>% mutate(deleted = ifelse(current_status == "closed",1,0)) %>%
  group_by(site) %>%
  summarise(deleted = max(deleted),
          audience = unique(`main audience / scope`),
          size = unique(size),
          blocked = max(blocked),
          source_of_funding = unique(source_of_funding),
          stance = unique(stance),
          type = unique(activity_type),
          hq = unique(`country where the office / team is based`))

sample_other_censored_agg <- sample_other_censored_agg %>% mutate(source_of_funding = ifelse(source_of_funding == "State-backed","State-owned",source_of_funding),
                                                                  type = ifelse(type == "Radio","TV",type))


#Number of outlets that became inactive
nrow(filter(sample_other_censored_agg,deleted == 1)) #6

m3 <- lm(deleted~blocked,sample_other_censored_agg)
m4 <- lm(deleted~blocked+size+source_of_funding+stance+type+hq,sample_other_censored_agg)

#Do deletion analysis extra
htmlreg(list(m3,m4),custom.coef.map = list('(Intercept)' = "Intercept",'blocked' = "Blocked",
                                          'sizeBelow median' = "Pre-May 2017 traffic: below median)",
                                          'source_of_fundingPrivate' = "Funding: private",
                                          'source_of_fundingState-owned' = "Funding: state",
                                          'stanceMuslim opposition' = "Stance: Islamist opposition",
                                          'stanceOther sectarian' = "Stance: other sectarian",
                                          'stancePro-government' = "Stance: pro-government",
                                          'stanceSports/entertainment' = "Stance: sports/entertainment",
                                          'typeNews Agency' = "Type: news agency",
                                          'typeTV' = "Type: TV/radio",'hqGermany' = "HQ: Germany",
                                          'hqIran' = "HQ: Iran",'hqIraq' = "HQ: Iraq",
                                          'hqJordan' = "HQ: Jordan",'hqLebanon' = "HQ: Lebanon",
                                          'hqPalestine' = "HQ: Palestine",'hqQatar' = "HQ: Qatar",
                                          'hqRussia' = "HQ: Russia",'hqSaudi Arabia' = "HQ: Saudi Arabia",
                                          'hqSouth Africa' = "HQ: South Africa",'hqTunisia' = "HQ: Tunisia",
                                          'hqTurkey' = "HQ: Turkey",'hqUAE' = "HQ: UAE",
                                          'hqUK' = "HQ: UK",'hqUnknown' = "HQ: Unknown",
                                          'hqUSA' = "HQ: USA",'hqYemen' = "HQ: Yemen"),
       file = "output/tables/tableE1_likelihood.html")


#Run traffic analyses

sample_other_censored <- filter(sample_other_censored,!is.na(before_after_time))



main_plot_ao <- plot_traffic(bw = 30,stance_fun = unique(sample_other_censored$stance),
                            size_fun = unique(sample_other_censored$size),
                            outcome_fun = "reach_per_million",dataset = sample_other_censored) 

main_plot_bo <- plot_traffic(bw = 142,stance_fun = unique(sample_other_censored$stance),
                            size_fun = unique(sample_other_censored$size),
                            outcome_fun  =  "reach_per_million",dataset = sample_other_censored) 

main_plot_co <- plot_traffic_placebo(bw = 30,stance_fun = unique(sample_other_censored$stance),
                                    size_fun = unique(sample_other_censored$size),
                                    outcome = "reach_per_million",dataset = sample_other_censored,
                                    nsims = 10000)

main_plot_do <- plot_traffic_placebo(bw = 142,stance_fun = unique(sample_other_censored$stance),
                                    size_fun = unique(sample_other_censored$size),
                                    outcome = "reach_per_million",dataset = sample_other_censored,
                                    nsims = 10000)

main_plot_other <- ggarrange(main_plot_ao[[1]],main_plot_bo[[1]],
                       main_plot_co[[1]], main_plot_do[[1]], ncol = 2, 
                       nrow = 2,labels = c("a","b","c", "d"),
                       font.label = list(size = 30, color = "black", face = "bold", family = NULL),
                       hjust = 0) 

ggsave("output/figures/figure_H3_international.pdf",main_plot_other,width = 26, height = 16)

#Measure duration
print(Sys.time() - start)